import HTMLParser
import urllib
import re


urlString = "http://nixi.in/index.php?option=com_member&Itemid=43"
urlText = []
pathOut = "ixp_members.txt"


class Parse178 (HTMLParser.HTMLParser):
    
    # da settare a True per analizzare l'intestazione della tabella
    check1 = False
    # da settare a True per analizzare la tabella corretta
    ok = False
    check2 = False
    
    def handle_starttag (self, tag, attrs):
        if tag == "caption":
            self.check1 = True
        if tag == "td" and self.ok == True:
            self.check2 = True
            
    def handle_endtag (self, tag):
        if tag == "table" and self.ok == True:
            self.ok = False
    
    def handle_data (self, data):
        if self.check1 == True:
            if data == "ISP Members in Kolkata":
                self.ok = True
            self.check1 = False
        if self.check2 == True:
            if re.match("^\d+$", data) and data not in urlText:
                urlText.append(data)
            self.check2 = False


lparser = Parse178()
lparser.feed(urllib.urlopen(urlString).read())
fileOut = open(pathOut, "a")
for item in urlText:
    #print item # debug
    print >> fileOut, "178 %s" % item
fileOut.close()